%Benchmark the fft2 function over GPU and CPU. There is a lot of overhead
%in data transfer between memory and GPU so only moderate performance
%improvement was observed.Probably good for offline analysis or reduce CPU
%workloads.

transformsize=10240;
repeats=1;
reset(cuda);;
A=rand(transformsize,transformsize,repeats,'single');
s1=gpuArray(A);
s2=A;
for i=1:10
tic;
r1=(fft2(s1)+1.2).*5;
wait(cuda)
%r=gather(r1);
time1=toc;
tic;
%reset(g);
r1=(fft2(s2)+1.2).*5;
%wait(g)
time2=toc;
speedup=time2/time1
end